Tian Zheng
September 18, 2018
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='figs/',
echo=T, warning=FALSE, message=FALSE)if (!require("DT")) install.packages('DT')
if (!require("dtplyr")) install.packages('dtplyr')
if (!require("lubridate")) install.packages('lubridate')
if (!require("ggmap")) install.packages('ggmap')
if (!require("choroplethrZip")) {
# install.packages("devtools")
library(devtools)
install_github('arilamstein/choroplethrZip@v1.5.0')}
library(dtplyr)
library(dplyr)
library(DT)
library(lubridate)install.packages("shiny")
library(shiny)
runExample("01_hello")Shiny tutorials can be found at http://shiny.rstudio.com/articles/.
From NYC open data, we downloaded 2009 housing sales data of Manhattan. datatable from the DT package is a great tool for exploring data in your R notebook or presentation.
mh2009=read.csv(file="../data/ManhattanHousing.csv")
datatable(sample_n(mh2009, 50))mh2009=
mh2009%>%
filter(ZIP.CODE>0)%>%
mutate(region=as.character(ZIP.CODE))count.df=mh2009%>%
group_by(region)%>%
summarise(
value=n()
)
save(count.df, file="../output/count.RData")choroplethr“A choropleth map (from Greek χώρο (”area/region“) + πλήθος (”multitude“)) is a thematic map in which areas are shaded or patterned in proportion to the measurement of the statistical variable being displayed on the map, such as population density or per-capita income.”
if (!require("choroplethr")) install.packages("choroplethr")
if (!require("devtools")) install.packages("devtools")
library(devtools)
if (!require("choroplethrZip"))
devtools::install_github('arilamstein/choroplethrZip@v1.5.0')
if (!require("ggplot2")) devtools::install_github("hadley/ggplot2")
if (!require("ggmap")) devtools::install_github("dkahle/ggmap")library(choroplethrZip)
zip_choropleth(count.df,
title = "2009 Manhattan housing sales",
legend = "Number of sales",
county_zoom = 36061)We can obtain more geo information about individual sales.
library(ggmap)
library(dplyr)
mh2009.selgeo=
mh2009%>%
sample_n(100)%>%
select(starts_with("ADD"))%>%
mutate(ADDRESS_Ext=paste(ADDRESS, "New York, NY", sep=","))%>%
mutate_geocode(ADDRESS_Ext)
mh2009.selgeo=mh2009.selgeo%>%
na.omit()library(ggmap)
ggmap(get_map("Manhattan",source="google",
maptype = "hybrid",
zoom=12, color = "bw")) +
geom_point(data=mh2009.selgeo, aes(x=lon,y=lat), color='red') A shiny app needs two files. - ui.r a user-interface script - server.r a server script (the actual analysis)
mh2009.use=
mh2009%>%
mutate(sale.month=month(as.Date(SALE.DATE, "%m/%d/%y")))%>%
mutate(sale.price=ifelse(SALE.PRICE==0, NA, SALE.PRICE))%>%
mutate(footage=ifelse(GROSS.SQUARE.FEET==0, NA, GROSS.SQUARE.FEET))%>%
mutate(unit.price=sale.price/footage)%>%
mutate(bldg.type=substr(BUILDING.CLASS.CATEGORY, 1, 2))%>%
filter(bldg.type %in% c("10", "13", "25", "28"))%>%
arrange(bldg.type)
save(mh2009.use, file="../output/mh2009use.RData")man.nbhd=c("Central Harlem", "Chelsea and Clinton",
"East Harlem", "Gramercy Park and Murray Hill",
"Greenwich Village and Soho", "Lower Manhattan",
"Lower East Side", "Upper East Side", "Upper West Side",
"Inwood and Washington Heights")
zip.nbhd=list(1:length(man.nbhd))
zip.nbhd[[1]]=c(10026, 10027, 10030, 10037, 10039)
zip.nbhd[[2]]=c(10001, 10011, 10018, 10019, 10020)
zip.nbhd[[3]]=c(10036, 10029, 10035)
zip.nbhd[[4]]=c(10010, 10016, 10017, 10022)
zip.nbhd[[5]]=c(10012, 10013, 10014)
zip.nbhd[[6]]=c(10004, 10005, 10006, 10007, 10038, 10280)
zip.nbhd[[7]]=c(10002, 10003, 10009)
zip.nbhd[[8]]=c(10021, 10028, 10044, 10065, 10075, 10128)
zip.nbhd[[9]]=c(10023, 10024, 10025)
zip.nbhd[[10]]=c(10031, 10032, 10033, 10034, 10040)